\subsection{Notation}
Let \( f \colon (E, \mathcal E, \mu) \to \mathbb R \) be an `integrable' function, a notion we will define.
We will then define the integral with respect to \( \mu \), either written \( \mu(f) \) or \( \int_E f \dd{\mu} = \int_E f(x) \dd{\mu(x)} \).
If \( X \) is a random variable, we will define its expectation \( \expect{X} = \int_\Omega X \dd{\mathbb P} = \int_\Omega X(\omega) \dd{\mathbb P(\omega)} \).

\subsection{Definition}
We say that a function \( f \colon (E,\mathcal E,\mu) \to \mathbb R \) is \emph{simple} if it is of the form
\[ f = \sum_{k=1}^m a_k \symbb 1_{A_k};\quad a_k \geq 0;\quad A_k \in \mathcal E;\quad m \in \mathbb N \]
\begin{definition}
	The \emph{\( \mu \)-integral} of a simple function \( f \) defined as above is
	\[ \mu(f) = \sum_{k=1}^m a_k \mu(A_k) \]
	which is independent of the choice of representation of the simple function.
\end{definition}
\begin{remark}
	We have \( \mu(\alpha f + \beta g) = \alpha \mu(f) + \beta \mu(g) \) for all nonnegative coefficients \( \alpha, \beta \) and simple functions \( f, g \).
	If \( g \leq f \), \( \mu(g) \leq \mu(f) \), so \( \mu \) is increasing.
	If \( f = 0 \) almost everywhere, \( \mu(f) = 0 \).
\end{remark}
For a general non-negative function \( f \colon (E,\mathcal E,\mu) \to \mathbb R \), we define its \( \mu \)-integral to be
\[ \mu(f) = \sup\qty{\mu(g) \mid g \leq f, g \text{ simple}} \]
which agrees with the above definition for simple functions.
This operator takes values in the extended non-negative real line \( [0,\infty] \).
Now, for \( f \colon (E,\mathcal E,\mu) \to \mathbb R \) measurable but not necessarily non-negative, we define \( f^+ = \max(f,0) \) and \( f^- = \max(-f,0) \), so that \( f = f^+ - f^- \) and \( \abs{f} = f^+ + f^- \).
\begin{definition}
	A measurable function \( f \colon (E,\mathcal E,\mu) \to \mathbb R \) is \emph{\( \mu \)-integrable} if \( \mu(\abs{f}) < \infty \).
	In this case, we define its integral to be
	\[ \mu(f) = \mu(f^+) - \mu(f^-) \]
	which is a well-defined real number.
\end{definition}

\subsection{Monotone convergence theorem}
\begin{theorem}
	Let \( f_n, f \colon (E,\mathcal E,\mu) \to \mathbb R \) be measurable and non-negative such that \( f_n \) increases pointwise to \( f \), so \( f_n(x) \leq f_{n+1}(x) \leq f(x) \) and \( f_n(x) \to f(x) \) as \( n \to \infty \).
	Then, \( \mu(f_n) \to \mu(f) \) as \( n \to \infty \).
\end{theorem}
\begin{remark}
	This is a theorem that allows us to interchange a pair of limits, \( \mu(f) = \mu\qty(\lim_n f_n) = \lim_n \mu(f_n) \).
	Also, \( g_n \geq 0 \), \( \mu\qty(\sum_n g_n) = \sum_n \mu(g_n) \).

	If we consider the approximating sequence \( \widetilde f_n = 2^{-n} \floor*{2^n f} \), as defined in the monotone class theorem, then this is a non-negative sequence converging to \( f \).
	So in particular, \( \mu(f) \) is equal to the limit of the integrals of these simple functions.

	It suffices to require convergence of \( f_n \to f \) almost everywhere, the general argument does not need to change.
	The non-negativity constraint is not required if the first term in the sequence \( f_0 \) is integrable, by subtracting \( f_0 \) from every term.
\end{remark}
\begin{proof}
	Recall that \( \mu(f) = \sup\qty{\mu(g) \mid g \leq f, g \text{ simple}} \).
	Since \( f_n \) is an increasing sequence of nonnegative functions, \( \mu(f_n) \) is an increasing sequence of nonnegative functions.
	So it converges to its (\emph{extended} non-negative real) supremum \( M = \sup_n \mu(f_n) \).
	Since \( f_n \leq f \), \( \mu(f_n) \leq \mu(f) \), so taking suprema, \( M \leq \mu(f) \).
	If \( M \) is finite, \( \sup_n \mu(f_n) = \lim_n \mu(f_n) \leq \mu(f) \).
	If \( M \) is infinite, we are already done.

	Now, we need to show \( \mu(f) \leq M \), or equivalently, \( \mu(g) \leq M \) for all simple \( g \) such that \( g \leq f \), so that taking suprema, \( \mu(f) = \sup_g \mu(g) \leq M \).
	We define \( g_n = \min (\overline f_n, g) \), where \( \overline f_n \) is the \( n \)th approximation of \( f_n \) by simple functions from the monotone class theorem.
	Now, since \( f_n \) increases to \( f \), \( \overline f_n \) increases to \( f \).
	In particular, \( g_n = \min(\overline f_n, g) \) increases to \( \min(f, g) = g \).
	Since \( \overline f_n \leq f_n \) by definition, we have \( g_n \leq f_n \) for all \( n \).

	Now let \( g \) be an arbitrary simple function of the form \( g = \sum_{k=1}^m a_k \symbb 1_{A_k} \) where \( a_k \geq 0 \) and the \( A_k \in \mathcal E \) are disjoint.
	For \( \varepsilon > 0 \), we define sets \( A_k(n) = \qty{x \in A_k \mid g_n(x) \geq (1-\varepsilon) a_k} \).
	Since \( g = a_k \) on \( A_k \), and since \( g_n \) increases to \( g \), we must have \( A_k(n) \) increases to \( A_k \) for all \( k \).
	Since \( \mu \) is a measure, \( \mu(A_k(n)) \) increases to \( \mu(A_k) \) by countable additivity.

	We have \( g_n \symbb 1_{A_k} \geq g_n \symbb 1_{A_k(n)} \geq (1-\varepsilon)a_k \symbb 1_{A_k(n)} \) on \( E \).
	Moreover, \( g_n = \sum_{k=1}^m g_n \symbb 1_{A_k} \) since the \( A_k \) are disjoint and support \( g_n \).
	Hence, \( g_n \geq \sum_{k=1}^m (1-\varepsilon)a_k \symbb 1_{A_k(n)} \), and in particular, \( \mu(g_n) \geq (1 - \varepsilon) \sum_{k=1}^m a_k \mu(A_k(n)) \).
	The right hand side increases to \( (1-\varepsilon) \sum_{k=1}^m a_k \mu(A_k) = (1-\varepsilon) \mu(g) \).
	Hence
	\[ \mu(g) \leq \frac{1}{1-\varepsilon} \limsup_n \mu(g_n) \leq \frac{1}{1-\varepsilon} \limsup_n \mu(f_n) \leq \frac{M}{1-\varepsilon} \]
	Since \( \varepsilon \) was arbitrary, this completes the proof.
\end{proof}

\subsection{Linearity of integral}
\begin{theorem}
	Let \( f, g \colon (E, \mathcal E, \mu) \to \mathbb R \) be nonnegative measurable functions.
	Then \( \mu(\alpha f + \beta g) = \alpha \mu(f) + \beta \mu(g) \) for all \( \alpha, \beta \geq 0 \).
	Further, if \( g \leq f \), then \( \mu(g) \leq \mu(f) \).
	Finally, \( f = 0 \) almost everywhere if and only if \( \mu(f) = 0 \).
\end{theorem}
\begin{proof}
	If \( \widetilde f_n, \widetilde g_n \) are the approximations of \( f \) and \( g \) by simple funtions from the monotone class theorem, \( \alpha \widetilde f_n \) increases to \( \alpha f \) and \( \beta \widetilde g_n \) increases to \( \beta g \), so \( \alpha \widetilde f_n + \beta \widetilde g_n \) increases to \( \alpha f + \beta g \).
	Integrating both sides and using the monotone convergence theorem, the result follows, since linearity of simple functions is simple to prove.

	The second part \( g \leq f \implies \mu(g) \leq \mu(f) \) has already been proven.
	Now, if \( f = 0 \) almost everywhere, its approximation \( 0 \leq \widetilde f_n \) increases to \( f \) almost everywhere, so must be exactly zero for all \( n \).
	So \( \mu(\widetilde f_n) = 0 \) so \( \mu(f) = 0 \).
	Conversely, if \( \mu(f) = 0 \), then \( 0 \leq \mu(\widetilde f_n) \to 0 \) gives \( \mu(\widetilde f_n) = 0 \) so \( \widetilde f_n = 0 \) almost everywhere.
	Since \( 0 = \widetilde f_n \) increases almost everywhere to \( f \), \( f \) is zero almost everywhere.
\end{proof}
\begin{remark}
	Functions such as \( \symbb 1_{\mathbb Q} \) are integrable and have integral zero.
	They are `identified' with the zero element in the theory of integration.
\end{remark}
\begin{theorem}
	Let \( f, g \colon (E, \mathcal E, \mu) \to \mathbb R \) be integrable functions.
	Then \( \mu(\alpha f + \beta g) = \alpha \mu(f) + \beta \mu(g) \) for all \( \alpha, \beta \in \mathbb R \); if \( g \leq f \), then \( \mu(g) \leq \mu(f) \); and if \( f = 0 \) almost everywhere, we have \( \mu(f) = 0 \).
\end{theorem}
\begin{proof}
	Clearly, if \( f \) is integrable, so is \( \alpha f \), and \( \mu(-f) = -\mu(f) \), by definition of the integral for a general function.
	We can explicitly check that for \( \alpha \geq 0 \), we have \( \mu(\alpha f) = \mu((\alpha f)^+) - \mu((\alpha f)^-) = \alpha \mu(f^+) - \alpha \mu(f^-) = \alpha \mu(f) \).
	Define \( h = f + g \).
	Then \( h^+ + f^- + g^- = h^- + f^+ + g^+ \), so by the previous theorem, \( \mu(h^+) + \mu(f^-) + \mu(g^-) = \mu(h^-) + \mu(f^+) + \mu(g^+) \) and the result holds.

	Finally, if \( 0 \leq f - g \), we have \( 0 \leq \mu(0) \leq \mu(f - g) = \mu(f) - \mu(g) \) so the result follows.
	If \( f = 0 \) almost everywhere, \( f^+ = 0 \) and \( f^- = 0 \) almost everywhere, so \( \mu(f) = 0 \).
\end{proof}

\subsection{Fatou's lemma}
\begin{lemma}
	Let \( f_n \colon (E, \mathcal E, \mu) \to \mathbb R \) be nonnegative measurable functions.
	Then \( \mu (\liminf_n f_n) \leq \liminf_n \mu(f_n) \).
\end{lemma}
\begin{remark}
	Recall that \( \liminf_n x_n = \sup_n \inf_{m \geq n} x_m \) and \( \limsup_n x_n = \inf_n \sup_{m \geq n} x_m \).
	In particular, \( \limsup_n x_n = \liminf_n x_n \) implies that \( \lim_n x_n \) exists and is equal to \( \limsup_n x_n \) and \( \liminf_n x_n \).
	Hence, if the \( f_n \) converge to some measurable function \( f \), we must have \( \mu(f) \leq \liminf_n \mu(f_n) \).
\end{remark}
\begin{proof}
	We have \( \inf_{m \geq n} f_m \leq f_k \) for all \( k \geq n \), so by taking integrals, \( \mu\qty(\inf_{m \geq n} f_m) \leq \mu(f_k) \).
	Thus,
	\[ \mu\qty(\inf_{m \geq n} f_m) \leq \inf_{k \geq n} \mu(f_k) \leq \sup_n \inf_{k \geq n} \mu(f_k) = \liminf_n \mu(f_n) \]
	Note that \( \inf_{m \geq n} f_m \) increases to \( \sup_n \inf_{m \geq n} f_m = \liminf_n f_n \).
	By the monotone convergence theorem,
	\[ \mu\qty(\liminf_n f_n) = \lim_n \mu\qty(\inf_{m \geq n} f_m) \leq \liminf_n \mu(f_n) \]
	as required.
\end{proof}

\subsection{Dominated convergence theorem}
\begin{theorem}
	Let \( f_n, f \colon (E, \mathcal E, \mu) \) be measurable functions such that \( \abs{f_n} \leq g \) almost everywhere on \( E \), and the dominating function \( g \) is \( \mu \)-integrable, so \( \mu(g) < \infty \).
	Suppose \( f_n \to f \) pointwise (or almost everywhere) on \( E \).
	Then \( f_n \) and \( f \) are also integrable, and \( \mu(f_n) \to \mu(f) \) as \( n \to \infty \).
\end{theorem}
\begin{proof}
	Clearly \( \mu(\abs{f_n}) \leq \mu(g) < \infty \), so the \( f_n \) are integrable.
	Taking limits in \( \abs{f_n} \leq g \), we have \( \abs{f} \leq g \), so \( f \) is also integrable by the same argument.
	Now, \( g \pm f_n \) is a nonnegative function, and converges pointwise to \( g \pm f \).
	Since limits are equal to the limit inferior when they exist, by Fatou's lemma, we have
	\[ \mu(g) + \mu(f) = \mu(g + f) = \mu\qty(\liminf_n (g + f_n)) \leq \liminf_n \mu(g + f_n) = \mu(g) + \liminf_n \mu(f_n) \]
	Hence \( \mu(f) \leq \liminf_n \mu(f_n) \).
	Likewise, \( \mu(g) - \mu(f) \leq \mu(g) - \liminf_n \mu(f_n) \), so \( \mu(f) \geq \limsup_n \mu(f_n) \), so
	\[ \limsup_n \mu(f_n) \leq \mu(f) \leq \liminf_n \mu(f_n) \]
	But since \( \liminf_n \mu(f_n) \leq \limsup_n \mu(f_n) \), the result follows.
\end{proof}
\begin{example}
	Let \( E = [0,1] \) with the Lebesgue measure.
	Let \( f_n \to f \) pointwise and the \( f_n \) are uniformly bounded, so \( \sup_n \norm{f_n}_\infty \leq g \) for some \( g \in \mathbb R \).
	Then since \( \mu(g) = g < \infty \), the dominated convergence theorem implies that \( f_n, f \) are integrable and \( \mu(f_n) \to \mu(f) \) as \( n \to \infty \).
	In particular, no notion of uniform convergence of the \( f_n \) is required.
\end{example}
\begin{remark}
	The proof of the fundamental theorem of calculus requires only the fact that
	\[ \int_x^{x + h} \dd{t} = h \]
	This is a fact which is obviously true of the Riemann integral and also of the Lebesgue integral.
	Therefore, for any continuous function \( f \colon [0,1] \to \mathbb R \), we have
	\[ \underbrace{\int_0^x f(t) \dd{t}}_{\text{Riemann integral}} = F(x) = \underbrace{\int_0^x f(t) \dd{\mu(t)}}_{\text{Lebesgue integral}} \]
	So these integrals coincide for continuous functions.
	We can show that all Riemann integrable functions are \( \mu^\star \)-measurable, where \( \mu^\star \) is the outer measure of the Lebesgue measure, as defined in the proof of Carath\'eodory's theorem.
	However, there exist certain Riemann integrable functions that are not Borel measurable.
	We can find that a bounded \( \mu^\star \)-measurable function is Riemann integrable if and only if
	\[ \mu\qty(\qty{x \in [0,1] \mid f \text{ is discontinuous at } x}) = 0 \]
	The standard techniques of Riemann integration, such as substitution and integration by parts, extend to all bounded measurable functions by the monotone class theorem.
\end{remark}
\begin{theorem}
	Let \( U \subseteq \mathbb R \) be an open set and \( (E, \mathcal E, \mu) \) be a measure space.
	Let \( f \colon U \times E \to \mathbb R \) be a map such that \( x \mapsto f(t, x) \) is measurable, and \( t \mapsto f(t,x) \) is differentiable where \( \abs{\pdv{f}{t}} < g(x) \) for all \( t \in U \), and \( g \) is \( \mu \)-integrable.
	Then
	\[ F(t) = \int_E f(t,x) \dd{\mu(x)} \implies F'(t) = \int_E \pdv{f}{t}\qty(t,x) \dd{\mu(x)} \]
\end{theorem}
\begin{proof}
	By the mean value theorem,
	\[ g_h(x) = \frac{f(t + h, x) - f(t, x)}{h} - \pdv{f}{t}\qty(t,x) \implies \abs{g_h(x)} = \abs{\pdv{f}{t}\qty(\widetilde t, x) - \pdv{f}{t}\qty(t, x)} \leq 2g(x) \]
	Note that \( g \) is \( \mu \)-integrable.
	By differentiability of \( f \), we have \( g_h \to 0 \) as \( h \to 0 \), so applying the dominated convergence theorem, \( \mu(g_h) \to \mu(0) = 0 \).
	By linearity of the integral,
	\[ \mu(g_h) = \frac{\int_E f(t + h, x) - f(t, x) \dd{\mu(x)}}{h} - \int_E \pdv{f}{t}\qty(t,x) \dd{\mu(x)} \]
	Hence, \( \frac{F(t+h) - F(t)}{h} - F'(t) \to 0 \).
\end{proof}
\begin{example}
	For a measurable function \( f \colon (E, \mathcal E, \mu) \to (G, \mathcal G) \), if \( g \colon G \to \mathbb R \) is a nonnegative function, we show on an example sheet that
	\[ \mu \circ f^{-1}(g) = \int_G g \dd{\mu\circ f^{-1}} = \int_E g(f(x)) \dd{\mu(x)} = \mu(g \circ f) \]
	On a probability space \( (\Omega, \mathcal F, \mathbb P) \) and a \( G \)-valued random variable \( X \), we then compute
	\[ \expect{g(X)} = \mu_X(g) = \int_\Omega g(X(\omega)) \dd{\mathbb P(\omega)} = \int_\Omega g \dd{\mathbb {P}} \]
\end{example}
\begin{example}[measures with densities]
	If \( f \colon (E, \mathcal E, \mu) \to \mathbb R \) is a nonnegative measurable function, we can define \( \nu_f(A) = \mu(f \symbb 1_A) \) for any measurable set \( A \), which is again a measure on \( (E, \mathcal E) \) by the monotone convergence theorem.
	In particular, if \( g \colon (E, \mathcal E) \to \mathbb R \) is measurable, \( \nu_f(g) = \int_E g(x) f(x) \dd{\mu(x)} = \int_E g \dd{\nu(f)} \).
	We call \( f \) the \emph{density} of \( \nu_f \) with respect to \( \mu \).
	If its integral is one, it is called a \emph{probability density function}.
\end{example}
